In [1]:
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
In [2]:
df = pd.read_csv("density_clus2.csv")
df.head()
Out[2]:
G1_kVA/km G1_Customers/km G2_kVA/km G2_Customers/km G3_kVA/km G3_Customers/km G4_kVA/km G4_Customers/km
0 4313.976565 33.399235 1862.268741 374.596046 1781.155627 79.110583 1653.390696 267.928974
1 4999.080363 6.422016 1250.887617 251.552125 1113.283437 216.461350 6861.336361 3.108008
2 4427.403090 23.219377 1747.982603 403.100475 1068.699000 183.344920 3823.443228 6.319578
3 5734.095832 4.980156 1586.394505 87.832745 737.681555 110.138170 931.246525 88.690145
4 7472.555055 4.415658 4458.329485 17.833318 672.131687 269.142386 211.275854 1.207291
In [3]:
df_sep = pd.DataFrame([])
for i in range(1,5):
    df_temp = df[[f'G{i}_kVA/km',f'G{i}_Customers/km']]
    df_temp.columns =  ['kVA/km','Customers/km']
    df_temp['group'] = i
    df_temp = df_temp.dropna()#.sample(n=200)
    df_sep = pd.concat([df_sep,df_temp])
/tmp/ipykernel_1677/2749130029.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temp['group'] = i
/tmp/ipykernel_1677/2749130029.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temp['group'] = i
/tmp/ipykernel_1677/2749130029.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temp['group'] = i
/tmp/ipykernel_1677/2749130029.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temp['group'] = i
In [4]:
df_sep[['kVA/km_log','Customers/km_log']] = np.log(df_sep[['kVA/km','Customers/km']])
In [5]:
from matplotlib import animation
from IPython.display import display,HTML
from sklearn.cluster import KMeans
from functools import partial
In [6]:
def update_group(k,data,feature,group):
    model = KMeans(n_clusters=k).fit(data[feature].values)
    clus = model.predict(data[feature].values)
    for a in ax:
        a.clear()
    sns.scatterplot(data=data,x='kVA/km',y='Customers/km',hue=clus,palette=sns.color_palette()[:k],linewidth=0,ax=ax[1])
    sns.kdeplot(data=data,x='kVA/km',hue=clus,palette=sns.color_palette()[:k],common_norm=False,ax=ax[0],warn_singular=False)
    sns.kdeplot(data=data,y='Customers/km',hue=clus,palette=sns.color_palette()[:k],common_norm=False,ax=ax[2],warn_singular=False)
    ax[0].get_legend().remove()
    ax[2].get_legend().remove()
    
    fig.suptitle(f'Group {group} KMean k = {k}')
    #plt.close()
In [7]:
#update_group(4,data=df_g[['kVA/km','Customers/km']])
In [8]:
#%config InlineBackend.figure_formats = ['svg']
In [9]:
plt.rcParams['figure.dpi'] = 150

Group1¶

In [10]:
group = 1
In [11]:
df_g = df_sep[df_sep['group'] == group]
In [12]:
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km','Customers/km'],group=group), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())
Out[12]:

Kmean with log transform¶

In [13]:
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km_log','Customers/km_log'],group=group), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())
Out[13]:
In [ ]:
 

Group 2,3¶

In [14]:
#group = 1
In [15]:
df_g = df_sep[(df_sep['group'] == 2) | (df_sep['group'] == 3)].reset_index(drop=True)
In [16]:
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km','Customers/km'],group='2,3'), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())
Out[16]:

Kmean with log transform¶

In [17]:
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km_log','Customers/km_log'],group='2,3'), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())
Out[17]:
In [ ]:
 

Group4¶

In [18]:
group = 4
In [19]:
df_g = df_sep[df_sep['group'] == group]
In [20]:
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km','Customers/km'],group=group), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())
Out[20]:

Kmean with log transform¶

In [21]:
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km_log','Customers/km_log'],group=group), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())
Out[21]:
In [ ]: